Purpose

This document summarizes Rick Gilmore’s analysis of participant sorting data using graph and network analysis tools.

Set-up

Import data

Jaccard indices

The Jaccard index data are found in analysis/data/jaccard.csv.

jaccard_raw <- readr::read_csv("analysis/data/jaccard.csv")
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   Exemplar.Row = col_double(),
##   Exemplar.Col = col_double(),
##   Jaccard = col_double(),
##   Group = col_character()
## )
str(jaccard_raw)
## spec_tbl_df [950 × 4] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ Exemplar.Row: num [1:950] 1 1 1 1 1 1 1 1 1 1 ...
##  $ Exemplar.Col: num [1:950] 2 2 2 2 2 3 3 3 3 3 ...
##  $ Jaccard     : num [1:950] 0.0476 0.1186 0.1228 0.2 0.2692 ...
##  $ Group       : chr [1:950] "P31M" "P3M1" "P6M" "P6" ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   Exemplar.Row = col_double(),
##   ..   Exemplar.Col = col_double(),
##   ..   Jaccard = col_double(),
##   ..   Group = col_character()
##   .. )

It’s probably wise to reorder the data frame by wallpaper group, Jaccard index, and exemplar index.

jaccard <- jaccard_raw %>%
  dplyr::arrange(., Group, Exemplar.Row, desc(Jaccard))

Let’s add a Jaccard mean and median by Exemplar.Row.

jaccard_aug <- jaccard %>%
  dplyr::group_by(., Group, Exemplar.Row) %>%
  dplyr::mutate(.,
    j_mean = mean(Jaccard),
    j_med = median(Jaccard),
    j_max = max(Jaccard),
    j_min = min(Jaccard)
  )

Make edge, node tibbles

p1 <- jaccard %>%
  dplyr::filter(., Group == "P1")

p1_edges <- tibble(from = p1$Exemplar.Row,
                   to = p1$Exemplar.Col,
                   weight = p1$Jaccard)
p1_nodes <- tibble(id = 1:20)

Make network

p1_network <- network::network(p1_edges, vertex.attr = p1_nodes, 
                      matrix.type = "edgelist", ignore.eval = FALSE,
                      directed = FALSE)

Plotting

plot(p1_network, vertex.cex = 3, mode='circle')

Let’s pick the top ten strongest connections.

p1_tidy <- tidygraph::tbl_graph(nodes = p1_nodes, edges = p1_edges,
                                directed = FALSE)

ggraph::ggraph(p1_tidy) + geom_edge_link() + geom_node_point() + theme_graph()
## Using `stress` as default layout

ggraph(p1_tidy, layout = "graphopt") + 
  geom_node_point() +
  geom_edge_link(aes(width = weight), alpha = 0.8) + 
  scale_edge_width(range = c(0.2, 2)) +
  geom_node_text(aes(label = id), repel = TRUE) +
  labs(edge_width = "Jaccard") +
  theme_graph()

ggraph(p1_tidy, layout = "linear") + 
  geom_edge_arc(aes(width = weight), alpha = 0.8) + 
  scale_edge_width(range = c(0.2, 2)) +
  geom_node_text(aes(label = id)) +
  labs(edge_width = "Jaccard") +
  theme_graph()

Let’s pick the top two exemplars to plot.

p1_e8 <- p1_tidy %>%
  activate(edges) %>%
  dplyr::filter(., from == 8)

ggraph(p1_e8, layout = "linear") + 
  geom_edge_arc(aes(width = weight), alpha = 0.8) + 
  scale_edge_width(range = c(0.2, 2)) +
  geom_node_text(aes(label = id)) +
  labs(edge_width = "Jaccard") +
  theme_graph()

p1_e10 <- p1_tidy %>%
  activate(edges) %>%
  dplyr::filter(., from == 10 | to == 10)

ggraph(p1_e10, layout = "linear") + 
  geom_edge_arc(aes(width = weight, color = weight), alpha = 0.8) + 
  scale_edge_width(range = c(0.2, 2)) +
  geom_node_text(aes(label = id)) +
  labs(edge_width = "Jaccard") +
  theme_graph()

p1_e10 <- p1_tidy %>%
  activate(edges) %>%
  dplyr::filter(., from == 10 | to == 10)

ggraph(p1_e10, layout = "graphopt") + 
  geom_edge_link(aes(width = weight, color = weight), alpha = 0.8) + 
  scale_edge_width(range = c(0.2, 2)) +
  geom_node_text(aes(label = id)) +
  labs(edge_width = "Jaccard") +
  theme_graph()

p1_selected <- p1_tidy %>%
  activate(edges) %>%
  dplyr::filter(., from == 8 | to == 8)

ggraph(p1_selected, layout = "graphopt") + 
  geom_edge_link(aes(width = weight, color = weight), alpha = 0.8) + 
  scale_edge_width(range = c(0.2, 2)) +
  geom_node_text(aes(label = id)) +
  labs(edge_width = "Jaccard") +
  theme_graph()

p1_selected <- p1_tidy %>%
  activate(edges) %>%
  dplyr::filter(., from == 8 | to == 8)

ggraph(p1_selected, layout = "linear") + 
  geom_edge_arc(aes(width = weight, color = weight), alpha = 0.8) + 
  scale_edge_width(range = c(0.2, 2)) +
  geom_node_text(aes(label = id)) +
  labs(edge_width = "Jaccard") +
  theme_graph()

p1_selected <- p1_tidy %>%
  activate(edges) %>%
  dplyr::filter(., from == 10 | to == 10)

ggraph(p1_selected, layout = "linear") + 
  geom_edge_arc(aes(width = weight, color = weight), alpha = 0.8) + 
  scale_edge_width(range = c(0.2, 2)) +
  geom_node_text(aes(label = id)) +
  labs(edge_width = "Jaccard") +
  theme_graph()

jaccard_aug %>% 
  dplyr::filter(., Group == "P1") %>%
  dplyr::arrange(., desc(j_mean))
## # A tibble: 190 x 8
## # Groups:   Group, Exemplar.Row [19]
##    Exemplar.Row Exemplar.Col Jaccard Group j_mean j_med j_max  j_min
##           <dbl>        <dbl>   <dbl> <chr>  <dbl> <dbl> <dbl>  <dbl>
##  1           19           20  0.32   P1     0.32  0.32  0.32  0.32  
##  2           16           20  0.375  P1     0.244 0.26  0.375 0.0820
##  3           16           17  0.32   P1     0.244 0.26  0.375 0.0820
##  4           16           19  0.2    P1     0.244 0.26  0.375 0.0820
##  5           16           18  0.0820 P1     0.244 0.26  0.375 0.0820
##  6           10           16  0.404  P1     0.237 0.222 0.404 0.0820
##  7           10           15  0.347  P1     0.237 0.222 0.404 0.0820
##  8           10           20  0.347  P1     0.237 0.222 0.404 0.0820
##  9           10           19  0.269  P1     0.237 0.222 0.404 0.0820
## 10           10           12  0.222  P1     0.237 0.222 0.404 0.0820
## # … with 180 more rows

It looks like exemplars 19 and 16 are are among the highest.

jaccard_aug %>% 
  dplyr::filter(., Group == "P1") %>%
  dplyr::arrange(., j_mean)
## # A tibble: 190 x 8
## # Groups:   Group, Exemplar.Row [19]
##    Exemplar.Row Exemplar.Col Jaccard Group j_mean j_med j_max  j_min
##           <dbl>        <dbl>   <dbl> <chr>  <dbl> <dbl> <dbl>  <dbl>
##  1           18           19   0.1   P1     0.1   0.1   0.1   0.1   
##  2           18           20   0.1   P1     0.1   0.1   0.1   0.1   
##  3           11           13   0.226 P1     0.168 0.182 0.226 0.0833
##  4           11           17   0.226 P1     0.168 0.182 0.226 0.0833
##  5           11           14   0.204 P1     0.168 0.182 0.226 0.0833
##  6           11           15   0.182 P1     0.168 0.182 0.226 0.0833
##  7           11           18   0.182 P1     0.168 0.182 0.226 0.0833
##  8           11           20   0.182 P1     0.168 0.182 0.226 0.0833
##  9           11           16   0.121 P1     0.168 0.182 0.226 0.0833
## 10           11           12   0.102 P1     0.168 0.182 0.226 0.0833
## # … with 180 more rows

18 and 11 among the lowest

p1_selected <- p1_tidy %>%
  activate(edges) %>%
  dplyr::filter(., from == 18 | to == 18)

ggraph(p1_selected, layout = "linear") + 
  geom_edge_arc(aes(width = weight, color = weight), alpha = 0.8) + 
  scale_edge_width(range = c(0.2, 2)) +
  geom_node_text(aes(label = id)) +
  labs(edge_width = "Jaccard") +
  theme_graph()

p1_selected <- p1_tidy %>%
  activate(edges) %>%
  dplyr::filter(., from == 19 | to == 19)

g <- ggraph(p1_selected, layout = "linear") + 
  geom_edge_arc(aes(width = weight, color = weight), alpha = 0.8) + 
  scale_edge_width(range = c(0.1, 4), limits = c(0, .6)) +
  geom_node_text(aes(label = id)) +
  labs(edge_width = "Jaccard") +
  theme_graph()

g

p1_selected <- p1_tidy %>%
  activate(edges) %>%
  dplyr::filter(., from == 19 | to == 19)

p1_selected <- p1_selected %>%
  dplyr::mutate(weight = cut(weight, c(0, .1, .2, .3, .4, .5, .6)))

g <- ggraph(p1_selected, layout = "linear", circular = TRUE) + 
  geom_edge_arc(aes(color = factor(weight))) + 
  geom_node_text(aes(label = id)) +
  labs(edge_width = "Jaccard") +
  theme_graph()

g

plot_jaccard_vals <- function(df, exemplar_id, group, mean_j = NA) {
  df <- df %>%
    activate(edges) %>%
    dplyr::filter(., from == exemplar_id | to == exemplar_id) %>%
    dplyr::mutate(weight = cut(weight, c(0, .1, .2, .3, .4, .5, .6), labels = c("<.1", ".1-.2", ".2-.3", ".3-.4", ".4-.5", ".5-.6")))
  
  ggraph(df, layout = "linear", circular = TRUE) +
    geom_edge_arc(aes(color = weight)) +
    geom_node_text(aes(label = id)) +
    ggtitle(paste0(group, " | Exemplar ", exemplar_id, " | mean Jaccard ", mean_j)) +
    theme_graph()
}

plot_jaccard_vals(p1_tidy, 11, "P1")

P1

Highest mean Jaccard

plot_jaccard_vals(p1_tidy, 19, "P1")

plot_jaccard_vals(p1_tidy, 16, "P1")

Lowest mean Jaccard

plot_jaccard_vals(p1_tidy, 18, "P1")

plot_jaccard_vals(p1_tidy, 11, "P1")

Define functions to select wallpaper group

wp_graph <- function(df, group) {
  out_df <- df %>%
    dplyr::filter(., Group == group)

  df_edges <- tibble(from = out_df$Exemplar.Row,
                     to = out_df$Exemplar.Col,
                     weight = out_df$Jaccard)
  
  df_nodes <- tibble(id = 1:20)
 
  tidygraph::tbl_graph(nodes = df_nodes, 
                       edges = df_edges,
                       directed = FALSE)
}
jaccard_stats <- function(jaccard) {
  jaccard %>%
  dplyr::group_by(., Group, Exemplar.Row) %>%
    dplyr::mutate(.,
      j_mean = mean(Jaccard),
      j_med = median(Jaccard),
      j_max = max(Jaccard),
      j_min = min(Jaccard)
    ) %>%
    dplyr::summarise(.,
                     Jaccard_mean = mean(j_mean),
                     Jaccard_med = mean(j_med),
                     Jaccard_max = mean(j_max),
                     Jaccard_min = mean(j_min) )
}
graph <- wp_graph(jaccard, "P31M")
j_stats <- jaccard_stats(jaccard_raw)
## `summarise()` has grouped output by 'Group'. You can override using the `.groups` argument.
plot_jaccard_vals(graph, 18, "P31M")

pick_extreme_mean_exemplars <- function(j_stats, group, hi_lo = "hi", n_exemplars = 1) {
  this_group <- j_stats %>%
    dplyr::filter(., Group == group)
  
  if (hi_lo == "hi") {
    this_group <- this_group %>%
      dplyr::arrange(., desc(Jaccard_mean))
  } else {
    this_group <- this_group %>%
      dplyr::arrange(., Jaccard_mean)
  }
  
  this_group$Exemplar.Row[1:n_exemplars]
}

pick_extreme_max_exemplars <- function(j_stats, group, hi_lo = "hi", n_exemplars = 1) {
  this_group <- j_stats %>%
    dplyr::filter(., Group == group)
  
  if (hi_lo == "hi") {
    this_group <- this_group %>%
      dplyr::arrange(., desc(Jaccard_max))
  } else {
    this_group <- this_group %>%
      dplyr::arrange(., Jaccard_max)
  }
  
  this_group$Exemplar.Row[1:n_exemplars]
}

pick_extreme_min_exemplars <- function(j_stats, group, hi_lo = "hi", n_exemplars = 1) {
  this_group <- j_stats %>%
    dplyr::filter(., Group == group)
  
  if (hi_lo == "hi") {
    this_group <- this_group %>%
      dplyr::arrange(., desc(Jaccard_min))
  } else {
    this_group <- this_group %>%
      dplyr::arrange(., Jaccard_min)
  }
  
  this_group$Exemplar.Row[1:n_exemplars]
}

Test functions

jaccard <- jaccard_raw %>%
  dplyr::arrange(., Group, Exemplar.Row, desc(Jaccard))

j_stats <- jaccard_stats(jaccard_raw)
## `summarise()` has grouped output by 'Group'. You can override using the `.groups` argument.
this_group = "P31M"
graph <- wp_graph(jaccard, this_group)

hi_1_max <- pick_extreme_max_exemplars(j_stats, this_group, hi_lo = "hi")
lo_1_max <- pick_extreme_max_exemplars(j_stats, this_group, hi_lo = "lo")

(hi_max_j <- j_stats$Jaccard_mean[hi_1_max])
## [1] 0.190652
(lo_max_j <- j_stats$Jaccard_mean[lo_1_max])
## [1] 0.32
plot_jaccard_vals(graph, hi_1_max, this_group, hi_max_j)

plot_jaccard_vals(graph, lo_1_max, this_group, lo_max_j)